Using updated functions for simulating data and calculating thresholds

New functions:

Possible driver-response relationships

Examples of the types of driver-response relationships sim_data() can produce:

#layout(matrix(c(1, 3, 5, 7, 2, 4, 6, 8), nrow = 4, ncol = 2), widths = c(1, 1), heights = c(1, 1, 1, 1))
#layout.show(n = 8)

par(mfrow = c(4, 2))
layout(matrix(c(1, 3, 5, 7, 2, 4, 6, 8), nrow = 4, ncol = 2), widths = c(1, 1), heights = c(1, 1, 1, 1))
par(mar=c(2, 2, 0, 0), oma = c(2, 2, 0, 0))
# skew v1 (concave down)
control_pars1 <- list(thresh_loc = 3, thresh_loc_sd = 0, y_max = 3, y_min = 0, hs_type = 2, hs_a = NULL, skew_cv = 1, skew_conc = "down", sig_k = 1.5, lin_m = 1, lin_b = 0)
fun_test1 <- sim_data(nsim = 1, tmax = 200, fun = "skew", control_pars = control_pars1)
plot(x = fun_test1$driver, fun_test1$response, xlab = NA, ylab = NA, las = 1, yaxt = "n", xaxt = "n")
axis(side = 2, at = c(0, 1, 2, 3), las = 1)
axis(side = 1, at = c(0, 1, 2, 3, 4, 5, 6), labels = NA)
abline(v = control_pars1$thresh_loc)

# skew v2 (concave up)
control_pars1 <- list(thresh_loc = 3, thresh_loc_sd = 0, y_max = 3, y_min = 0, hs_type = 1, hs_a = NULL, skew_cv = 1, skew_conc = "up", sig_k = 1.5, lin_m = 1, lin_b = 0)
fun_test1 <- sim_data(nsim = 1, tmax = 200, fun = "skew", control_pars = control_pars1)
plot(x = fun_test1$driver, fun_test1$response, xlab = NA, ylab = NA, las = 1, yaxt = "n", xaxt = "n")
axis(side = 2, at = c(0, 1, 2, 3), las = 1, labels = NA)
axis(side = 1, at = c(0, 1, 2, 3, 4, 5, 6), labels = NA)
abline(v = control_pars1$thresh_loc)
mtext(side = 2, "Response", outer = T, adj = 0.5)

# sigmoid v1
control_pars1 <- list(thresh_loc = 3, thresh_loc_sd = 0, y_max = 3, y_min = 0, hs_type = 1, hs_a = NULL, skew_cv = 1, skew_conc = "up", sig_k = 1.5, lin_m = 1, lin_b = 0)
fun_test1 <- sim_data(nsim = 1, tmax = 200, fun = "sigmoidal", control_pars = control_pars1)
plot(x = fun_test1$driver, fun_test1$response, xlab = NA, ylab = NA, las = 1, yaxt = "n", xaxt = "n")
axis(side = 2, at = c(0, 1, 2, 3), las = 1)
axis(side = 1, at = c(0, 1, 2, 3, 4, 5, 6), labels = NA)
abline(v = control_pars1$thresh_loc)

# sigmoid v2
control_pars1 <- list(thresh_loc = 3, thresh_loc_sd = 0, y_max = 3, y_min = 0, hs_type = 2, hs_a = NULL, skew_cv = 1, skew_conc = "up", sig_k = -1.5, lin_m = 1, lin_b = 0)
fun_test1 <- sim_data(nsim = 1, tmax = 200, fun = "sigmoidal", control_pars = control_pars1)
plot(x = fun_test1$driver, fun_test1$response, xlab = NA, ylab = NA, las = 1, yaxt = "n", xaxt = "n")
axis(side = 2, at = c(0, 1, 2, 3), las = 1, labels = NA)
axis(side = 1, at = c(0, 1, 2, 3, 4, 5, 6), labels = NA)
abline(v = control_pars1$thresh_loc)

# hockey stick v1
control_pars1 <- list(thresh_loc = 3, thresh_loc_sd = 0, y_max = 3, y_min = 0, hs_type = 1, hs_a = NULL, skew_cv = 1, skew_conc = "up", sig_k = 1.5, lin_m = 1, lin_b = 0)
fun_test1 <- sim_data(nsim = 1, tmax = 200, fun = "hockeystick", control_pars = control_pars1)
plot(x = fun_test1$driver, fun_test1$response, xlab = NA, ylab = NA, las = 1, yaxt = "n", xaxt = "n")
axis(side = 2, at = c(0, 1, 2, 3), las = 1)
axis(side = 1, at = c(0, 1, 2, 3, 4, 5, 6), labels = NA)
abline(v = control_pars1$thresh_loc)

# hockey stick v2
control_pars1 <- list(thresh_loc = 3, thresh_loc_sd = 0, y_max = 3, y_min = 0, hs_type = 2, hs_a = NULL, skew_cv = 1, skew_conc = "up", sig_k = 1.5, lin_m = 1, lin_b = 0)
fun_test1 <- sim_data(nsim = 1, tmax = 200, fun = "hockeystick", control_pars = control_pars1)
plot(x = fun_test1$driver, fun_test1$response, xlab = NA, ylab = NA, las = 1, yaxt = "n", xaxt = "n")
axis(side = 2, at = c(0, 1, 2, 3), las = 1, labels = NA)
axis(side = 1, at = c(0, 1, 2, 3, 4, 5, 6), labels = NA)
abline(v = control_pars1$thresh_loc)

# step function
control_pars1 <- list(thresh_loc = 3, thresh_loc_sd = 0, y_max = 3, y_min = 0, hs_type = 1, hs_a = 3, skew_cv = 1, skew_conc = "up", sig_k = 1.5, lin_m = 1, lin_b = 0)
fun_test1 <- sim_data(nsim = 1, tmax = 200, fun = "hockeystick", control_pars = control_pars1)
plot(x = fun_test1$driver, fun_test1$response, xlab = NA, ylab = NA, las = 1, yaxt = "n", xaxt = "n")
axis(side = 2, at = c(0, 1, 2, 3), las = 1)
axis(side = 1, at = c(0, 1, 2, 3, 4, 5, 6))
abline(v = control_pars1$thresh_loc)
mtext(side = 1, "Driver", outer = T, adj = 0.5)

# linear
control_pars1 <- list(thresh_loc = 3, thresh_loc_sd = 0, y_max = 3, y_min = 0, hs_type = 1, hs_a = NULL, skew_cv = 1, skew_conc = "up", sig_k = 1.5, lin_m = 0, lin_b = 1.5)
fun_test1 <- sim_data(nsim = 1, tmax = 200, fun = "linear", control_pars = control_pars1)
plot(x = fun_test1$driver, fun_test1$response, xlab = NA, ylab = NA, las = 1, yaxt = "n", xaxt = "n", ylim = c(0, 3))
axis(side = 2, at = c(0, 1, 2, 3), las = 1, labels = NA)
axis(side = 1, at = c(0, 1, 2, 3, 4, 5, 6))

#abline(v = control_pars1$thresh_loc)

focal driver-response relationships

Could focus on three example relationships with thresholds and one linear relationship?

Here, each side of the threshold was arbitrarily labeled as “good” (where the response variable is at desired values) and “bad” (where the response variable is at values that want to be avoided)

aside: effect of smoothing on findPeaks function

The original threshold workshop code defined the threshold as the location where the second derivative was furthest from zero, which they calculated by using the findPeaks function in the quantmod package to find maxima and minima in the bootstrapped second derivative and then selected the peak that was furthest from zero (and significantly different from zero). But individual jackknife iterations are noisier than the smoothed quantiles of the bootstrapped derivative used in the workshop code, and the findPeaks function doesn’t seem to handle noisy data well

The following plots show the jackknifed gam predictions and first and second derivatives of an example simulation with a sigmoidal driver-response relationship, low observation error (obs error = 0.1), a long time series length (ts length = 45), and no missing covariate. Each individual curve is the result of a single jackknife iteration (i.e., leaving out one data point when fitting the gam). Blue points are the local maxima and minima detected on that curve by the findPeaks function. Results for different levels of smoothing are shown.

For the sigmoidal relationship, the true second derivative has one local maxima and one local minima, but because the estimated second derivative is noisy, the findPeaks function returns a large number of maxima and minima.

In this case it isn’t really a problem because all of these extra peaks are closer to zero than the true maxima and minima (and the threshold calculation method returns the peaks that are furthest from zero), but it might be an issue in other cases, e.g., if the ends of the curve where most of the noise is are further from zero than the true max/mins. Maybe we should think about adding more criteria to what counts as a local max/min in the findPeaks function?

smoothing = 0.025

smoothing = 0.1 (default in threshold workshop code)

smoothing = 0.2

run simulations

Look at low/high observation error, short/long time series, threshold quantile near and far from center of driver data, and with small/large effect of covariate

Compare jackknifing to original bootstrap approach, and for the jackknifing also look at multiple driver-response relationships

Meta-parameters:

# 2 ts lengths, 2 obs errors, 2 thresh quantiles,2 beta_sd's = 16 combinations, x3 driver response relationships (not including linear)

# number of simulations to run
nsim1 <- 50 # should to 100 but doing less for now so it takes less time
nsimb <- 10 # fewer simulations for bootstrapping

xvals1 <- seq(from = 0, to = 6, by = 0.01)

# parameter combinations
obs_set <- c(0.1, 5)
#obs_set <- c(0.5, 5)
ts_set <- c(15, 45)
quant_set <- c(0.15, 0.5)
#quant_set <- c(0.25, 0.5)
beta_sd_set <- c(0.01, 3)
#beta_sd_set <- c(0.01, 2)
#cov_set <- c(FALSE, TRUE)

parmat <- unname(as.matrix(expand.grid(obs_set, ts_set, quant_set, beta_sd_set)))

#parmat

# control pars for each driver-response relationship
# sigmoid v1
control_pars_sg1 <- list(thresh_loc = 3, thresh_loc_sd = 0, y_max = 3, y_min = 0, hs_type = 1, hs_a = NULL, skew_cv = 1, skew_conc = "up", sig_k = 1.5, lin_m = 1, lin_b = 0)

# skew v1 (concave down)
control_pars_sk1 <- list(thresh_loc = 3, thresh_loc_sd = 0, y_max = 3, y_min = 0, hs_type = 2, hs_a = NULL, skew_cv = 1, skew_conc = "down", sig_k = 1.5, lin_m = 1, lin_b = 0)

# hockey stick v1
control_pars_hs1 <- list(thresh_loc = 3, thresh_loc_sd = 0, y_max = 3, y_min = 0, hs_type = 1, hs_a = NULL, skew_cv = 1, skew_conc = "up", sig_k = 1.5, lin_m = 1, lin_b = 0)

# linear
control_pars_ln1 <- list(thresh_loc = 3, thresh_loc_sd = 0, y_max = 3, y_min = 0, hs_type = 1, hs_a = NULL, skew_cv = 1, skew_conc = "up", sig_k = 1.5, lin_m = 1, lin_b = 0)

Run the simulations:

# sigmoidal relationship

# jackknifing
tic()
for(p in 1:nrow(parmat)){ #1:nrow(parmat)
  # simulate the data
  driver_pars_p = list(x_min = NULL, x_max = NULL, thresh_quant = parmat[p, 3], x_df = 10, x_sd = 1, uniform = FALSE)
  cov_pars_p <- list(inc_cov = TRUE, beta_mean = 0, beta_sd = parmat[p, 4], beta_sign = 1)
  dt_p <- sim_data(nsim = nsim1, tmax = parmat[p, 2], driver_pars = driver_pars_p, obs_sd = parmat[p, 1], fun = "sigmoidal", control_pars = control_pars_sg1, cov_pars = cov_pars_p) # CHANGE fun and control_pars for each driver-response relationship
  
  # do the jackknifing
  thresh_pj <- jack_thresh(dt_p, xvals1, sig_criteria = T, span = 0.1)
  
  # add the parameter columns
  thresh_pj$obs_error <- rep(parmat[p, 1], length(thresh_pj$sim))
  thresh_pj$ts_length <- rep(parmat[p, 2], length(thresh_pj$sim))
  thresh_pj$thresh_quant <- rep(parmat[p, 3], length(thresh_pj$sim))
  thresh_pj$beta_sd <- rep(parmat[p, 4], length(thresh_pj$sim))
  
  # add column specifying this was jackknifing
  #thresh_pj$samp <- rep("jack", length(thresh_pj$sim))
  
  # save the data frame
  if(p == 1){
    jdf <- thresh_pj
  } else {
    jdf <- rbind(thresh_pj, jdf)
  }
  
}

toc()
## 1077.04 sec elapsed
# save results for this driver-response relationship
sig_jdf <- jdf
sig_jdf$shape <- rep("sigmoidal", length(sig_jdf$sim))

# bootstrapping
tic()
for(p in 1:nrow(parmat)){
  # simulate the data
  driver_pars_p = list(x_min = NULL, x_max = NULL, thresh_quant = parmat[p, 3], x_df = 10, x_sd = 1, uniform = FALSE)
  cov_pars_p <- list(inc_cov = TRUE, beta_mean = 0, beta_sd = parmat[p, 4], beta_sign = 1)
  dt_p <- sim_data(nsim = nsimb, tmax = parmat[p, 2], driver_pars = driver_pars_p, obs_sd = parmat[p, 1], fun = "sigmoidal", control_pars = control_pars_sg1, cov_pars = cov_pars_p) # CHANGE fun and control_pars for each driver-response relationship
  
  # do the bootstrapping
  thresh_bj <- boot_thresh(dt_p, xvals1, boot_nobs = 0.75*parmat[p, 2], span = 0.1)
  
  # add the parameter columns
  thresh_bj$obs_error <- rep(parmat[p, 1], length(thresh_bj$sim))
  thresh_bj$ts_length <- rep(parmat[p, 2], length(thresh_bj$sim))
  thresh_bj$thresh_quant <- rep(parmat[p, 3], length(thresh_bj$sim))
  thresh_bj$beta_sd <- rep(parmat[p, 4], length(thresh_bj$sim))
  
  # add column specifying this was bootstrapping
  #thresh_bj$samp <- rep("boot", length(thresh_bj$sim))
  
  
  # save the data frame
  if(p == 1){
    bdf <- thresh_bj
  } else {
    bdf <- rbind(thresh_bj, bdf)
  }
  
}


toc()
## 2330.923 sec elapsed
#View(jdf)
#View(bdf)

sig_bdf <- bdf
sig_bdf$shape <- rep("sigmoidal", length(sig_bdf$sim))


# jackknifing for skewed relationship
tic()
for(p in 1:nrow(parmat)){ #1:nrow(parmat)
  # simulate the data
  driver_pars_p = list(x_min = NULL, x_max = NULL, thresh_quant = parmat[p, 3], x_df = 10, x_sd = 1, uniform = FALSE)
  cov_pars_p <- list(inc_cov = TRUE, beta_mean = 0, beta_sd = parmat[p, 4], beta_sign = 1)
  dt_p <- sim_data(nsim = nsim1, tmax = parmat[p, 2], driver_pars = driver_pars_p, obs_sd = parmat[p, 1], fun = "skew", control_pars = control_pars_sk1, cov_pars = cov_pars_p) # CHANGE fun and control_pars for each driver-response relationship
  
  # do the jackknifing
  thresh_pj <- jack_thresh(dt_p, xvals1, sig_criteria = T, span = 0.1)
  
  # add the parameter columns
  thresh_pj$obs_error <- rep(parmat[p, 1], length(thresh_pj$sim))
  thresh_pj$ts_length <- rep(parmat[p, 2], length(thresh_pj$sim))
  thresh_pj$thresh_quant <- rep(parmat[p, 3], length(thresh_pj$sim))
  thresh_pj$beta_sd <- rep(parmat[p, 4], length(thresh_pj$sim))
  
  # add column specifying this was jackknifing
  #thresh_pj$samp <- rep("jack", length(thresh_pj$sim))
  
  # save the data frame
  if(p == 1){
    jdf <- thresh_pj
  } else {
    jdf <- rbind(thresh_pj, jdf)
  }
  
  
}


toc()
## 984.831 sec elapsed
skew_jdf <- jdf
skew_jdf$shape <- rep("skew", length(skew_jdf$sim))

# jackknifing for hockeystick relationship

tic()
for(p in 1:nrow(parmat)){ #1:nrow(parmat)
  # simulate the data
  driver_pars_p = list(x_min = NULL, x_max = NULL, thresh_quant = parmat[p, 3], x_df = 10, x_sd = 1, uniform = FALSE)
  cov_pars_p <- list(inc_cov = TRUE, beta_mean = 0, beta_sd = parmat[p, 4], beta_sign = 1)
  dt_p <- sim_data(nsim = nsim1, tmax = parmat[p, 2], driver_pars = driver_pars_p, obs_sd = parmat[p, 1], fun = "hockeystick", control_pars = control_pars_hs1, cov_pars = cov_pars_p) # CHANGE fun and control_pars for each driver-response relationship
  
  # do the jackknifing
  thresh_pj <- jack_thresh(dt_p, xvals1, sig_criteria = T, span = 0.1)
  
  # add the parameter columns
  thresh_pj$obs_error <- rep(parmat[p, 1], length(thresh_pj$sim))
  thresh_pj$ts_length <- rep(parmat[p, 2], length(thresh_pj$sim))
  thresh_pj$thresh_quant <- rep(parmat[p, 3], length(thresh_pj$sim))
  thresh_pj$beta_sd <- rep(parmat[p, 4], length(thresh_pj$sim))
  
  # add column specifying this was jackknifing
  #thresh_pj$samp <- rep("jack", length(thresh_pj$sim))
  
  # save the data frame
  if(p == 1){
    jdf <- thresh_pj
  } else {
    jdf <- rbind(thresh_pj, jdf)
  }
  
  
}


toc()
## 1006.999 sec elapsed
hs_jdf <- jdf
hs_jdf$shape <- rep("hockeystick", length(hs_jdf$sim))

compare jackknife and bootstrap (sigmoidal only, nsim = 10)

number of thresholds detected

Plot the difference in the number of thresholds detected each simulation (for bootstrap, this is either 0 or 1, while for jackknife this is the average of the number of thresholds detected each jackknife iteration)

Positive values mean that on average jackknifing detected more thresholds than bootstrapping while negative values mean bootstrapping detected more thresholds than jackknifing

In this plot (and all boxplots below), each panel is a different combination of 1) observation error, 2) time series length, and 3) standard deviation in the effect of the covariate on the response (beta_sd, where beta ~ abs(N(0, beta_sd)) is the slope of the relationship between the covariate and the response). Columns 1 and 3: obs error = 0.1. Columns 2 and 4: obs error = 5. Columns 1-2: time series length = 15. Columns 3-4: time series length = 45. Top row: beta_sd = 0.01. Bottom row: beta_sd = 3.

Within each panel, the results are grouped based on thresh_quant, which is the quantile of the driver data that the threshold falls in (i.e., quant = 0.15 means the true threshold is equal to the 15% quantile of the driver values, and quant = 0.5 means the true threshold is eaual to the median of the driver values), and the subgroupings represent the the method used to calculate the threshold. Purple = local max/min in the second derivative that has the largest absolute value, green = local min in second deriv that is furthest from zero, orange = location where the first derivative crosses zero (a max/min in the response), light blue = location where the second derivative crosses zero (an inflection point)

magnitude of difference from true threshold

Plot the difference in the absolute value of the difference between the threshold estimate and the true threshold value, i.e., abs(jack_est - true) - abs(boot_est - true)

Positive values mean the bootstrapped estimate was closer to the true value, i.e., abs(jack_est - true) > abs(boot_est - true), while negative values mean the jackknifed estimate was closer.

jackknifing results (nsim = 50, sigmoidal, skew, and hockeystick relationships)

average number of thresholds detected

Plot the average number of thresholds detected across the jackknife iterations each simulation

sigmoidal relationship

skew relationship

hockeystick relationship

difference between mean estimate and true threshold

Plot the difference between the mean threshold estimate (i.e., average of the estimates across jackknife iterations in a simulation) and the true value of the threshold

sigmoidal relationship

skew relationship

hockeystick relationship

RMSE of estimate

Plot the RMSE of the threshold estimates from the jackknifing each simulation

sigmoidal relationship

skew relationship

hockeystick relationship

repeat w/o significance criteria in threshold calculations?